from pyspark import SparkContext
from pyspark.sql import SQLContext
import pandas as pd
sc = SparkContext()
spark = SQLContext(sc)
df11 = spark.read.csv(r'C:\Users\Wang\Desktop\data\stock_small.csv',header=True)
df12 = spark.read.csv(r'C:\Users\Wang\Desktop\data\dividends_small.csv',header=True)
df2 = df12[df12['symbol'] == "IBM"]    #得到dividends_small中IBM所在行
df1 = df11[df11['stock_symbol'] == "IBM"]
df=pd.merge(df1,df2,left_on='stock_symbol',right_on='symbol')
df=df.iloc[:, [1, 2, 6]]
print(df)
